#!/usr/bin/env python
# -*- coding: utf-8 -*-
#from __future__ import division, with_statement
'''
Copyright 2013, 陈同 (chentong_biology@163.com).  
===========================================================
'''
__author__ = 'chentong & ct586[9]'
__author_email__ = 'chentong_biology@163.com'
#=========================================================
desc = '''
Functional description:
    This is designed to transfer RNAfold predicted dot-bracket
    notification to bed format.

Input fiel format:
    RNAfold output(here only lists parts of them)
    >P_19127_3
    CUGACAACUUCAUCAGUACUUGUUCCUCAG
    ((((........)))).............(  (-79.40)
    >P_19127_3
    CUGACAACUUCAUCAGUACUUGUUCCUCAG
    ((((........)))).............(  (-64.40)
    >P_19127_3
    CUGACAACUUCAUCAGUACUUGUUCCUCAG
    ((((........)))).............( (-29.30)
'''

import sys
import os
from time import localtime, strftime 
timeformat = "%Y-%m-%d %H:%M:%S"
from optparse import OptionParser as OP

def cmdparameter(argv):
    if len(argv) == 1:
        global desc
        print >>sys.stderr, desc
        cmd = 'python ' + argv[0] + ' -h'
        os.system(cmd)
        sys.exit(1)
    usages = "%prog -i file"
    parser = OP(usage=usages)
    parser.add_option("-i", "--input-file", dest="filein",
        metavar="FILEIN", help="RNAfold output file. Normally \
generated by running RNAfold <seq.fa >seq.2ndstrut ")
    parser.add_option("-b", "--bed", dest="bed",
        help="The bed file used to get seq.fa. If you use sequences \
downloaded from other places, you can try using \
locatemRNAToGenomeUsingBlat.py to get their genome coordinates.")
    parser.add_option("-v", "--verbose", dest="verbose",
        default=0, help="Show process information")
    parser.add_option("-d", "--debug", dest="debug",
        default=False, help="Debug the program")
    (options, args) = parser.parse_args(argv[1:])
    assert options.filein != None, "A filename needed for -i"
    return (options, args)
#--------------------------------------------------------------------

def getPos(seq_structure_L, posL):
    '''
    seq_structure_L = [key, sequence, dot-bracket notification]
    '''
    strand = posL[5]
    seq   = seq_structure_L[1]
    struc = seq_structure_L[2].split(' ')[0]
    if strand == '-':
        seq = list(seq)
        seq.reverse()
        struc = list(struc)
        struc.reverse()
    #--------------------------
    chr   = posL[0]
    start = int(posL[1])
    end   = int(posL[2])
    key   = posL[3]
    value = posL[4]
    for i in range(start, end):
        index = i - start
        print "%s\t%d\t%d\t%s\t%s\t%s\t%s\t%s" % \
            (chr,i,i+1,key, value, strand, 
            seq[index], struc[index])
    #-----------------------------------------------
#----------------------------------------

def main():
    options, args = cmdparameter(sys.argv)
    #-----------------------------------
    file = options.filein
    bed  = options.bed
    verbose = options.verbose
    debug = options.debug
    #-----------------------------------
    bedDict = {}
    for line in open(bed):
        lineL = line.split()
        key = lineL[3]
        assert key not in bedDict, "Duplicate keys %s" % key
        bedDict[key] = lineL
    if file == '-':
        fh = sys.stdin
    else:
        fh = open(file)
    #--------------------------------
    seq_structure_L = []
    '''
    seq_structure_L = [key, sequence, dot-bracket notification]
    '''
    for line in fh:
        if line[0] == '>':
            if seq_structure_L:
                getPos(seq_structure_L, bedDict[key])
            key = line[1:-1]
            seq_structure_L = [key]
        else:
            seq_structure_L.append(line.strip())
    #-------------END reading file----------
    #----close file handle for files-----
    if file != '-':
        fh.close()
    #-----------end close fh-----------
    if verbose:
        print >>sys.stderr,\
            "--Successful %s" % strftime(timeformat, localtime())
if __name__ == '__main__':
    startTime = strftime(timeformat, localtime())
    main()
    endTime = strftime(timeformat, localtime())
    fh = open('python.log', 'a')
    print >>fh, "%s\n\tRun time : %s - %s " % \
        (' '.join(sys.argv), startTime, endTime)
    fh.close()



